weather_df =
rnoaa::meteo_pull_monitors(
c("USW00094728", "USW00022534", "USS0023B17S"),
var = c("PRCP", "TMIN", "TMAX"),
date_min = "2021-01-01",
date_max = "2022-12-31") |>
mutate(
name = case_match(
id,
"USW00094728" ~ "CentralPark_NY",
"USW00022534" ~ "Molokai_HI",
"USS0023B17S" ~ "Waterhole_WA"),
tmin = tmin / 10,
tmax = tmax / 10) |>
select(name, id, everything())
## using cached file: /Users/zhangshizhe/Library/Caches/org.R-project.R/R/rnoaa/noaa_ghcnd/USW00094728.dly
## date created (size, mb): 2024-09-26 10:23:14.171516 (8.651)
## file min/max dates: 1869-01-01 / 2024-09-30
## using cached file: /Users/zhangshizhe/Library/Caches/org.R-project.R/R/rnoaa/noaa_ghcnd/USW00022534.dly
## date created (size, mb): 2024-09-26 10:23:25.989412 (3.932)
## file min/max dates: 1949-10-01 / 2024-09-30
## using cached file: /Users/zhangshizhe/Library/Caches/org.R-project.R/R/rnoaa/noaa_ghcnd/USS0023B17S.dly
## date created (size, mb): 2024-09-26 10:23:29.76067 (1.036)
## file min/max dates: 1999-09-01 / 2024-09-30
weather_df |>
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5)
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
Make a scatterlot but fancy
weather_df |>
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5) +
labs(
title = "Temperature plot",
x = "Minimum daily temperature (C)",
y = "Maxiumum daily temperature (C)",
color = "Location",
caption = "Data from the rnoaa package"
)
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
Scales –
weather_df |>
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5) +
labs(
title = "Temperature plot",
x = "Minimum daily temperature (C)",
y = "Maxiumum daily temperature (C)",
color = "Location",
caption = "Data from the rnoaa package"
) +
scale_x_continuous(
breaks = c(-15, 0, 20),
labels = c("-15C", "0", "20")
) +
scale_y_continuous(
limits = c(0, 30),
transform = "sqrt"
)
## Warning in transformation$transform(x): NaNs produced
## Warning in scale_y_continuous(limits = c(0, 30), transform = "sqrt"): sqrt
## transformation introduced infinite values.
## Warning: Removed 302 rows containing missing values or values outside the scale range
## (`geom_point()`).
Look at color
ggp_scatterplot = weather_df |>
ggplot(aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5) +
labs(
title = "Temperature plot",
x = "Minimum daily temperature (C)",
y = "Maxiumum daily temperature (C)",
color = "Location",
caption = "Data from the rnoaa package") +
scale_color_hue(h = c(100, 300)) +
viridis::scale_color_viridis(discrete = TRUE)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
Themes
ggp_scatterplot +
theme(legend.position = "bottom")
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggp_scatterplot +
theme_bw() +
theme(legend.position = "bottom")
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggp_scatterplot +
theme(legend.position = "bottom") +
theme_minimal()
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggp_scatterplot +
theme(legend.position = "bottom")+
theme_classic()
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggp_scatterplot +
theme(legend.position = "bottom") +
ggthemes::theme_excel()
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
LA
ggplot(weather_df, aes(x = date, y = tmax, color = name)) +
geom_smooth(se = FALSE) +
geom_point(aes(size = prcp), alpha = .75) +
labs(
title = "Temperature plot",
x = "Date",
y = "Maxiumum daily temperature (C)",
color = "Location",
caption = "Data from the rnoaa package"
) +
viridis::scale_color_viridis(discrete = TRUE) +
theme_minimal() +
theme(legend.position = "bottom")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 19 rows containing missing values or values outside the scale range
## (`geom_point()`).
Extra bonus in ggplot Use different datasets in different ’geom’s
central_park_df =
weather_df |>
filter(name == "CentralPark_NY")
molokai_df =
weather_df |>
filter(name == "Molokai_HI")
molokai_df |>
ggplot(aes(x=date, y=tmax, color=name))+
geom_point()+
geom_line(data = central_park_df)
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
Mutiple panels
weather_df |>
ggplot(aes(x = tmax, fill = name))+
geom_density()+
facet_grid( ~name)
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_density()`).
patch plots
ggp_tmax_tmin =
weather_df |>
ggplot(aes(x = tmin, y = tmax, color = name))+
geom_point(alpha = .3)
ggp_tmax_density =
weather_df |>
ggplot(aes(x = tmax, fill = name))+
geom_density(alpha = .3)
ggp_tmax_date =
weather_df |>
ggplot(aes(x = date, y = tmax, color = name))+
geom_point(alpha = .3) +
geom_smooth(se=FALSE)
(ggp_tmax_tmin + ggp_tmax_density)/ggp_tmax_date
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_density()`).
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 17 rows containing missing values or values outside the scale range
## (`geom_point()`).
Data manipulation
weather_df |>
mutate(name = forcats::fct_relevel(name, c("Molokai_HI", "CentralPark_NY", "Waterhole_WA"))) |>
ggplot(aes(x = name, y = tmax)) +
geom_violin(aes(fill = name), color = "blue", alpha = .5) +
theme(legend.position = "bottom")
## Warning: Removed 17 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
weather_df |>
select(name, tmax, tmin) |>
pivot_longer(
tmax:tmin,
names_to = "observation",
values_to = "temp") |>
ggplot(aes(x = temp, fill = observation)) +
geom_density(alpha = .5) +
facet_grid(~name) +
viridis::scale_fill_viridis(discrete = TRUE)
## Warning: Removed 34 rows containing non-finite outside the scale range
## (`stat_density()`).
Pulse data next
pulse_data =
haven::read_sas("./public_pulse_data.sas7bdat") |>
janitor::clean_names() |>
pivot_longer(
bdi_score_bl:bdi_score_12m,
names_to = "visit",
names_prefix = "bdi_score_",
values_to = "bdi") |>
select(id, visit, everything()) |>
mutate(
visit = recode(visit, "bl" = "00m"),
visit = factor(visit, levels = str_c(c("00", "01", "06", "12"), "m"))) |>
arrange(id, visit)
ggplot(pulse_data, aes(x = visit, y = bdi)) +
geom_boxplot()
## Warning: Removed 879 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
pup_data =
read_csv("./FAS_pups.csv", na = c("NA", ".", "")) |>
janitor::clean_names() |>
mutate(
sex =
case_match(
sex,
1 ~ "male",
2 ~ "female"))
## Rows: 313 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Litter Number
## dbl (5): Sex, PD ears, PD eyes, PD pivot, PD walk
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
litter_data =
read_csv("./FAS_litters.csv", na = c("NA", ".", "")) |>
janitor::clean_names() |>
separate(group, into = c("dose", "day_of_tx"), sep = 3)
## Rows: 49 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Group, Litter Number
## dbl (6): GD0 weight, GD18 weight, GD of Birth, Pups born alive, Pups dead @ ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fas_data = left_join(pup_data, litter_data, by = "litter_number")
fas_data |>
select(sex, dose, day_of_tx, pd_ears:pd_walk) |>
pivot_longer(
pd_ears:pd_walk,
names_to = "outcome",
values_to = "pn_day") |>
drop_na() |>
mutate(outcome = forcats::fct_reorder(outcome, pn_day, median)) |>
ggplot(aes(x = dose, y = pn_day)) +
geom_violin() +
facet_grid(day_of_tx ~ outcome)